home *** CD-ROM | disk | FTP | other *** search
/ MacHack 2001 / MacHack 2001.toast / pc / Sessions / Traut / ZStrings / Source / CrossPlatform / ZStringParser.cpp < prev    next >
Encoding:
C/C++ Source or Header  |  2001-06-23  |  31.9 KB  |  1,186 lines

  1. /*==================================================================
  2.     File:        ZStringParser.cpp
  3.     
  4.     Contains:    Class for parsing named ZStrings into platform-
  5.                 specific strings.
  6.  
  7.     Written by:    Eric Traut
  8.     
  9.     Copyright:    2000-2001 Connectix Corporation
  10.     
  11.     This source has been placed into the public domain by
  12.     Connectix Corporation. You have the right to modify, 
  13.     distribute or use this code without any legal limitations
  14.     or finanicial/licensing requirements. Connectix is not 
  15.     liable for any problems that result from the use of this 
  16.     code.
  17.     
  18.     If you have comments, feedback, questions, or would like
  19.     to submit bug fixes or updates to this code, please email
  20.     opensource@connectix.com.
  21. ==================================================================*/
  22.  
  23. #include "ZStringParser.h"
  24. #include "ZStringDictionary.h"
  25. #include "ZString.h"
  26.  
  27. #include <stdio.h>
  28. #include <ctype.h>
  29.  
  30. ZStringParser *            ZStringParser::sParser = NULL;
  31.  
  32. typedef struct
  33. {
  34.     const char *    fTagName;                            // Tag name (alpha representation)
  35.     ZStringTagID    fTagID;                                // Internal tag ID
  36.     Z_Boolean        fCheckForPartialTag;                // Indicates that the parser should check for a tag missing &
  37. } ZStringParseTag;
  38.  
  39.  
  40. // WARNING: For performance reasons, we use a binary
  41. // search lookup, so the following table must remain
  42. // sorted alphabetically. if you add more items,
  43. // make sure to add them in the appropriate place.
  44. static const ZStringParseTag    sParseTags[] = {
  45.     {"À",            kZTag_Agrave,        true        },
  46.     {"Å",            kZTag_Aring,        false        },
  47.     {"Ã",            kZTag_Atilde,        true        },
  48.     {"Ä",            kZTag_Auml,            true        },
  49.     {"Ç",            kZTag_Ccedil,        true        },
  50.     {"É",            kZTag_Eacute,        true        },
  51.     {"Ê",            kZTag_Ecirc,        true        },
  52.     {"È",            kZTag_Egrave,        true        },
  53.     {"Ë",            kZTag_Euml,            true        },
  54.     {"Í",            kZTag_Iacute,        true        },
  55.     {"Î",            kZTag_Icirc,        true        },
  56.     {"Ì",            kZTag_Igrave,        true        },
  57.     {"Ï",            kZTag_Iuml,            true        },
  58.     {"Ñ",            kZTag_Ntilde,        true        },
  59.     {"Ó",            kZTag_Oacute,        true        },
  60.     {"Ô",            kZTag_Ocirc,        true        },
  61.     {"Ò",            kZTag_Ograve,        true        },
  62.     {"Ø",            kZTag_Oslash,        true        },
  63.     {"Õ",            kZTag_Otilde,        true        },
  64.     {"Ö",            kZTag_Ouml,            true        },
  65.     {"Ú",            kZTag_Uacute,        true        },
  66.     {"Û",            kZTag_Ucirc,        true        },
  67.     {"Ù",            kZTag_Ugrave,        true        },
  68.     {"Ü",            kZTag_Uuml,            true        },
  69.     {"á",            kZTag_aacute,        true        },
  70.     {"â",            kZTag_acirc,        true        },
  71.     {"æ",            kZTag_aelig,        true        },
  72.     {"à",            kZTag_agrave,        true        },
  73.     {"&",            kZTag_amp,            false        },
  74.     {"å",            kZTag_aring    ,        false        },
  75.     {"ã",            kZTag_atilde,        true        },
  76.     {"ä",            kZTag_auml,            true        },
  77.     {"&bdquo",            kZTag_bdquo,        true        },
  78.     {"&bull",            kZTag_bull,            false        },
  79.     {"ç",            kZTag_ccedil,        true        },
  80.     {"¢",            kZTag_cent,            false        },
  81.     {"©",            kZTag_copy,            false        },
  82.     {"é",            kZTag_eacute,        true        },
  83.     {"ê",            kZTag_ecirc,        true        },
  84.     {"è",            kZTag_egrave,        true        },
  85.     {"ë",            kZTag_euml,            true        },
  86.     {">",                kZTag_gt,            false        },
  87.     {"&hellip",            kZTag_hellip,        false        },
  88.     {"í",            kZTag_iacute,        true        },
  89.     {"î",            kZTag_icirc,        true        },
  90.     {"¡",            kZTag_iexcl,        true        },
  91.     {"ì",            kZTag_igrave,        true        },
  92.     {"¿",            kZTag_iquest,        true        },
  93.     {"ï",            kZTag_iuml,            true        },
  94.     {"&ldquo",            kZTag_ldquo,        true        },
  95.     {"&lsquo",            kZTag_lsquo,        true        },
  96.     {"<",                kZTag_lt,            false        },
  97.     {"&mdash",            kZTag_mdash,        true        },
  98.     {"µ",            kZTag_micro,        false        },
  99.     {" ",            kZTag_nbsp,            true        },
  100.     {"&ndash",            kZTag_ndash,        true        },
  101.     {"ñ",            kZTag_ntilde,        true        },
  102.     {"ó",            kZTag_oacute,        true        },
  103.     {"ô",            kZTag_ocirc,        true        },
  104.     {"ò",            kZTag_ograve,        true        },
  105.     {"ø",            kZTag_oslash,        true        },
  106.     {"õ",            kZTag_otilde,        true        },
  107.     {"ö",            kZTag_ouml,            true        },
  108.     {"¶",            kZTag_para,            false        },
  109.     {"&pi",                kZTag_pi,            false        },
  110.     {"£",            kZTag_pound,        false        },
  111.     {"&rdquo",            kZTag_rdquo,        true        },
  112.     {"®",            kZTag_reg,            false        },
  113.     {"&replace",        kZTag_replace,        true        },
  114.     {"&rsquo",            kZTag_rsquo,        true        },
  115.     {"&sbquo",            kZTag_sbquo,        true        },
  116.     {"ß",            kZTag_szlig,        true        },
  117.     {"&trade",            kZTag_trade,        false        },
  118.     {"ú",            kZTag_uacute,        true        },
  119.     {"û",            kZTag_ucirc,        true        },
  120.     {"ù",            kZTag_ugrave,        true        },
  121.     {"ü",            kZTag_uuml,            true        },
  122.     {"¥",            kZTag_yen,            false        },
  123.     {"ÿ",            kZTag_yuml,            true        },
  124.     {"<br>",            kZTag_br,            true        }
  125. };
  126.  
  127. static const ZStringTagID    sNumericParseTags[] = {
  128.             kZTag_Invalid,                // 000
  129.             kZTag_Invalid,                // 001
  130.             kZTag_Invalid,                // 002
  131.             kZTag_Invalid,                // 003
  132.             kZTag_Invalid,                // 004
  133.             kZTag_Invalid,                // 005
  134.             kZTag_Invalid,                // 006
  135.             kZTag_Invalid,                // 007
  136.             kZTag_Invalid,                // 008
  137.             kZTag_Invalid,                // 009
  138.  
  139.             kZTag_Invalid,                // 010
  140.             kZTag_Invalid,                // 011
  141.             kZTag_Invalid,                // 012
  142.             kZTag_Invalid,                // 013
  143.             kZTag_Invalid,                // 014
  144.             kZTag_Invalid,                // 015
  145.             kZTag_Invalid,                // 016
  146.             kZTag_Invalid,                // 017
  147.             kZTag_Invalid,                // 018
  148.             kZTag_Invalid,                // 019
  149.  
  150.             kZTag_Invalid,                // 020
  151.             kZTag_Invalid,                // 021
  152.             kZTag_Invalid,                // 022
  153.             kZTag_Invalid,                // 023
  154.             kZTag_Invalid,                // 024
  155.             kZTag_Invalid,                // 025
  156.             kZTag_Invalid,                // 026
  157.             kZTag_Invalid,                // 027
  158.             kZTag_Invalid,                // 028
  159.             kZTag_Invalid,                // 029
  160.  
  161.             kZTag_Invalid,                // 030
  162.             kZTag_Invalid,                // 031
  163.             kZTag_Invalid,                // 032
  164.             kZTag_Invalid,                // 033
  165.             kZTag_Invalid,                // 034
  166.             kZTag_Invalid,                // 035
  167.             kZTag_Invalid,                // 036
  168.             kZTag_Invalid,                // 037
  169.             kZTag_amp,                    // 038
  170.             kZTag_Invalid,                // 039
  171.  
  172.             kZTag_Invalid,                // 040
  173.             kZTag_Invalid,                // 041
  174.             kZTag_Invalid,                // 042
  175.             kZTag_Invalid,                // 043
  176.             kZTag_Invalid,                // 044
  177.             kZTag_Invalid,                // 045
  178.             kZTag_Invalid,                // 046
  179.             kZTag_Invalid,                // 047
  180.             kZTag_Invalid,                // 048
  181.             kZTag_Invalid,                // 049
  182.  
  183.             kZTag_Invalid,                // 050
  184.             kZTag_Invalid,                // 051
  185.             kZTag_Invalid,                // 052
  186.             kZTag_Invalid,                // 053
  187.             kZTag_Invalid,                // 054
  188.             kZTag_Invalid,                // 055
  189.             kZTag_Invalid,                // 056
  190.             kZTag_Invalid,                // 057
  191.             kZTag_Invalid,                // 058
  192.             kZTag_Invalid,                // 059
  193.  
  194.             kZTag_lt,                    // 060
  195.             kZTag_Invalid,                // 061
  196.             kZTag_gt,                    // 062
  197.             kZTag_Invalid,                // 063
  198.             kZTag_Invalid,                // 064
  199.             kZTag_Invalid,                // 065
  200.             kZTag_Invalid,                // 066
  201.             kZTag_Invalid,                // 067
  202.             kZTag_Invalid,                // 068
  203.             kZTag_Invalid,                // 069
  204.  
  205.             kZTag_Invalid,                // 070
  206.             kZTag_Invalid,                // 071
  207.             kZTag_Invalid,                // 072
  208.             kZTag_Invalid,                // 073
  209.             kZTag_Invalid,                // 074
  210.             kZTag_Invalid,                // 075
  211.             kZTag_Invalid,                // 076
  212.             kZTag_Invalid,                // 077
  213.             kZTag_Invalid,                // 078
  214.             kZTag_Invalid,                // 079
  215.  
  216.             kZTag_Invalid,                // 080
  217.             kZTag_Invalid,                // 081
  218.             kZTag_Invalid,                // 082
  219.             kZTag_Invalid,                // 083
  220.             kZTag_Invalid,                // 084
  221.             kZTag_Invalid,                // 085
  222.             kZTag_Invalid,                // 086
  223.             kZTag_Invalid,                // 087
  224.             kZTag_Invalid,                // 088
  225.             kZTag_Invalid,                // 089
  226.  
  227.             kZTag_Invalid,                // 090
  228.             kZTag_Invalid,                // 091
  229.             kZTag_Invalid,                // 092
  230.             kZTag_Invalid,                // 093
  231.             kZTag_Invalid,                // 094
  232.             kZTag_Invalid,                // 095
  233.             kZTag_Invalid,                // 096
  234.             kZTag_Invalid,                // 097
  235.             kZTag_Invalid,                // 098
  236.             kZTag_Invalid,                // 099
  237.  
  238.             kZTag_Invalid,                // 100
  239.             kZTag_Invalid,                // 101
  240.             kZTag_Invalid,                // 102
  241.             kZTag_Invalid,                // 103
  242.             kZTag_Invalid,                // 104
  243.             kZTag_Invalid,                // 105
  244.             kZTag_Invalid,                // 106
  245.             kZTag_Invalid,                // 107
  246.             kZTag_Invalid,                // 108
  247.             kZTag_Invalid,                // 109
  248.  
  249.             kZTag_Invalid,                // 110
  250.             kZTag_Invalid,                // 111
  251.             kZTag_pi,                    // 112
  252.             kZTag_Invalid,                // 113
  253.             kZTag_Invalid,                // 114
  254.             kZTag_Invalid,                // 115
  255.             kZTag_Invalid,                // 116
  256.             kZTag_Invalid,                // 117
  257.             kZTag_Invalid,                // 118
  258.             kZTag_Invalid,                // 119
  259.  
  260.             kZTag_Invalid,                // 120
  261.             kZTag_Invalid,                // 121
  262.             kZTag_Invalid,                // 122
  263.             kZTag_Invalid,                // 123
  264.             kZTag_Invalid,                // 124
  265.             kZTag_Invalid,                // 125
  266.             kZTag_Invalid,                // 126
  267.             kZTag_Invalid,                // 127
  268.             kZTag_Invalid,                // 128
  269.             kZTag_Invalid,                // 129
  270.  
  271.             kZTag_Invalid,                // 130
  272.             kZTag_Invalid,                // 131
  273.             kZTag_bdquo,                // 132
  274.             kZTag_hellip,                // 133
  275.             kZTag_Invalid,                // 134
  276.             kZTag_Invalid,                // 135
  277.             kZTag_Invalid,                // 136
  278.             kZTag_Invalid,                // 137
  279.             kZTag_Invalid,                // 138
  280.             kZTag_Invalid,                // 139
  281.  
  282.             kZTag_Invalid,                // 140
  283.             kZTag_Invalid,                // 141
  284.             kZTag_Invalid,                // 142
  285.             kZTag_Invalid,                // 143
  286.             kZTag_Invalid,                // 144
  287.             kZTag_lsquo,                // 145
  288.             kZTag_rsquo,                // 146
  289.             kZTag_ldquo,                // 147
  290.             kZTag_rdquo,                // 148
  291.             kZTag_bull,                    // 149
  292.  
  293.             kZTag_ndash,                // 150
  294.             kZTag_mdash,                // 151
  295.             kZTag_Invalid,                // 152
  296.             kZTag_trade,                // 153
  297.             kZTag_Invalid,                // 154
  298.             kZTag_Invalid,                // 155
  299.             kZTag_Invalid,                // 156
  300.             kZTag_Invalid,                // 157
  301.             kZTag_Invalid,                // 158
  302.             kZTag_Invalid,                // 159
  303.  
  304.             kZTag_nbsp,                    // 160
  305.             kZTag_iexcl,                // 161
  306.             kZTag_cent,                    // 162
  307.             kZTag_pound,                // 163
  308.             kZTag_Invalid,                // 164
  309.             kZTag_yen,                    // 165
  310.             kZTag_Invalid,                // 166
  311.             kZTag_Invalid,                // 167
  312.             kZTag_Invalid,                // 168
  313.             kZTag_copy,                    // 169
  314.  
  315.             kZTag_Invalid,                // 170
  316.             kZTag_Invalid,                // 171
  317.             kZTag_Invalid,                // 172
  318.             kZTag_Invalid,                // 173
  319.             kZTag_reg,                    // 174
  320.             kZTag_Invalid,                // 175
  321.             kZTag_Invalid,                // 176
  322.             kZTag_Invalid,                // 177
  323.             kZTag_Invalid,                // 178
  324.             kZTag_Invalid,                // 179
  325.  
  326.             kZTag_Invalid,                // 180
  327.             kZTag_micro,                // 181
  328.             kZTag_para,                    // 182
  329.             kZTag_Invalid,                // 183
  330.             kZTag_Invalid,                // 184
  331.             kZTag_Invalid,                // 185
  332.             kZTag_Invalid,                // 186
  333.             kZTag_Invalid,                // 187
  334.             kZTag_Invalid,                // 188
  335.             kZTag_Invalid,                // 189
  336.  
  337.             kZTag_Invalid,                // 190
  338.             kZTag_iquest,                // 191
  339.             kZTag_Agrave,                // 192
  340.             kZTag_Aacute,                // 193
  341.             kZTag_Acirc,                // 194
  342.             kZTag_Atilde,                // 195
  343.             kZTag_Auml,                    // 196
  344.             kZTag_Aring,                // 197
  345.             kZTag_AElig,                // 198
  346.             kZTag_Ccedil,                // 199
  347.  
  348.             kZTag_Egrave,                // 200
  349.             kZTag_Eacute,                // 201
  350.             kZTag_Ecirc,                // 202
  351.             kZTag_Euml,                    // 203
  352.             kZTag_Igrave,                // 204
  353.             kZTag_Iacute,                // 205
  354.             kZTag_Icirc,                // 206
  355.             kZTag_Iuml,                    // 207
  356.             kZTag_Invalid,                // 208
  357.             kZTag_Ntilde,                // 209
  358.  
  359.             kZTag_Ograve,                // 210
  360.             kZTag_Oacute,                // 211
  361.             kZTag_Ocirc,                // 212
  362.             kZTag_Otilde,                // 213
  363.             kZTag_Ouml,                    // 214
  364.             kZTag_Invalid,                // 215
  365.             kZTag_Oslash,                // 216
  366.             kZTag_Ugrave,                // 217
  367.             kZTag_Uacute,                // 218
  368.             kZTag_Ucirc,                // 219
  369.  
  370.             kZTag_Uuml,                    // 220
  371.             kZTag_Invalid,                // 221
  372.             kZTag_Invalid,                // 222
  373.             kZTag_szlig,                // 223
  374.             kZTag_agrave,                // 224
  375.             kZTag_aacute,                // 225
  376.             kZTag_acirc,                // 226
  377.             kZTag_atilde,                // 227
  378.             kZTag_auml,                    // 228
  379.             kZTag_aring,                // 229
  380.  
  381.             kZTag_aelig,                // 230
  382.             kZTag_ccedil,                // 231
  383.             kZTag_egrave,                // 232
  384.             kZTag_eacute,                // 233
  385.             kZTag_ecirc,                // 234
  386.             kZTag_euml,                    // 235
  387.             kZTag_igrave,                // 236
  388.             kZTag_iacute,                // 237
  389.             kZTag_icirc,                // 238
  390.             kZTag_iuml,                    // 239
  391.  
  392.             kZTag_Invalid,                // 240
  393.             kZTag_ntilde,                // 241
  394.             kZTag_ograve,                // 242
  395.             kZTag_oacute,                // 243
  396.             kZTag_ocirc,                // 244
  397.             kZTag_otilde,                // 245
  398.             kZTag_ouml,                    // 246
  399.             kZTag_Invalid,                // 247
  400.             kZTag_oslash,                // 248
  401.             kZTag_ugrave,                // 249
  402.  
  403.             kZTag_uacute,                // 250
  404.             kZTag_ucirc,                // 251
  405.             kZTag_uuml,                    // 252
  406.             kZTag_Invalid,                // 253
  407.             kZTag_Invalid,                // 254
  408.             kZTag_yuml                    // 255
  409. };
  410.  
  411. /*------------------------------------------------------------------
  412.     ZToolOptions
  413.  
  414.     The defualt constructor contains the default values for the tool.
  415. ------------------------------------------------------------------*/
  416.  
  417. ZToolOptions::ZToolOptions()
  418. {
  419.     // default settings
  420.     mOutputNumericTags = true;
  421.     mCategorizeOutput = true;
  422.     mAllowTagSemicolon = false;
  423.     mHasOTags = false;
  424.     mOutputWarnings = true;
  425.     mConvertHighASCIIChar = true;
  426.     mPrintErrorsOnly = false;
  427.     mFlagDuplicates = false;
  428. }
  429.  
  430. /*------------------------------------------------------------------
  431.     ZToolOptions
  432.  
  433.     This constructor sets most of the variables based upon the input 
  434.     values.  Two variables, mHasOTags and mPrintErrorsOnly, are 
  435.     exceptions because they are only set to true in a particular 
  436.     instance and do not depend on the user's option choices.
  437. ------------------------------------------------------------------*/
  438.  
  439. ZToolOptions::ZToolOptions(
  440.     Z_Boolean                inOutputNumeric,
  441.     Z_Boolean                inCategorizeOutput,
  442.     Z_Boolean                inAllowSemicolon,
  443.     Z_Boolean                inFlagDuplicates,
  444.     Z_Boolean                inOutputWarnings,
  445.     Z_Boolean                inConvertHighASCIIChar)
  446. {
  447.     mOutputNumericTags = inOutputNumeric;
  448.     mCategorizeOutput = inCategorizeOutput;
  449.     mAllowTagSemicolon = inAllowSemicolon;
  450.     mFlagDuplicates = inFlagDuplicates;
  451.     mOutputWarnings = inOutputWarnings;
  452.     mConvertHighASCIIChar = inConvertHighASCIIChar;
  453.     mHasOTags = false;
  454.     mPrintErrorsOnly = false;
  455. }
  456.  
  457. /*------------------------------------------------------------------
  458.     ZStringParser
  459. ------------------------------------------------------------------*/
  460.  
  461. ZStringParser::ZStringParser()
  462. {
  463.     // This is a singleton class
  464.     check(sParser == NULL);
  465.     sParser = this;
  466.     
  467.     // By default, the two-byte table is all zeros.
  468.     memset(mTwoByteTable, 0, sizeof(mTwoByteTable));
  469. }
  470.  
  471.  
  472. /*------------------------------------------------------------------
  473.     OverrideTwoByteTable
  474. ------------------------------------------------------------------*/
  475.  
  476. void
  477. ZStringParser::OverrideTwoByteTable(
  478.     Z_UInt8 *                    inNewTable)
  479. {
  480.     memcpy(mTwoByteTable, inNewTable, sizeof(mTwoByteTable));
  481. }
  482.  
  483.  
  484. /*------------------------------------------------------------------
  485.     SkipOverSpaces
  486.  
  487.     This method skips over all spaces.
  488. ------------------------------------------------------------------*/
  489.  
  490. const char *
  491. ZStringParser::SkipOverSpaces(
  492.     const char *              inCurrentPtr)
  493. {
  494.     const char * newPtr = inCurrentPtr;
  495.     while (isspace(*newPtr))
  496.         newPtr++;
  497.     return newPtr;
  498. }
  499.  
  500.  
  501. /*------------------------------------------------------------------
  502.     ParseAdditionalParameters
  503.  
  504.     This method parses additional parameters contained in the ZString
  505.     tag.  Currently, there is only the limit parameter, in the format
  506.     limit=# where # may be an arbitrary length.  The limit parameter
  507.     sets the maximum data size allowed for the ZString.
  508. ------------------------------------------------------------------*/
  509.  
  510. Z_Boolean
  511. ZStringParser::ParseAdditionalParameters(
  512.     const char *                inCurrentPtr,
  513.     ZStringParseInfo &            outParseInfo)
  514. {
  515.     const char * startOfTagPtr;
  516.     const char * currentNumPtr;
  517.     const char * equalSignPtr;
  518.  
  519.     startOfTagPtr = SkipOverSpaces(inCurrentPtr);
  520.  
  521.     if (*startOfTagPtr == '>')                                // Found the end marker
  522.         return true;
  523.  
  524.     // Parse a limit parameter
  525.     if (strncmp(startOfTagPtr, "limit=", 6) == 0)            // Found a limit tag
  526.     {
  527.         currentNumPtr = strpbrk(startOfTagPtr, " >") - 1;    // Go to the last number
  528.  
  529.         Z_UInt16 numericTagValue = 0;
  530.         Z_UInt16 multiple = 1;
  531.  
  532.         equalSignPtr = startOfTagPtr + 5;
  533.  
  534.         // Parse the number by starting at the end and working back to the equal sign
  535.         while (currentNumPtr > equalSignPtr)
  536.         {
  537.             if (*currentNumPtr < '0' || *currentNumPtr > '9')
  538.                 return false;
  539.             numericTagValue += (*currentNumPtr - '0') * multiple;
  540.             multiple *= 10;
  541.             currentNumPtr--;
  542.         }
  543.  
  544.         outParseInfo.fHasMaxDataLen = true;
  545.         outParseInfo.fMaxDataLen = numericTagValue;
  546.  
  547.         return true;
  548.     }
  549.  
  550.     return false;        // None ofthe parameters matched the ones listed here
  551. }
  552.  
  553.  
  554. /*------------------------------------------------------------------
  555.     ParseNamedString
  556. ------------------------------------------------------------------*/
  557.  
  558. Z_Boolean
  559. ZStringParser::ParseNamedString(
  560.     const char *                inNamedString,
  561.     ZStringParseInfo &            outParseInfo,
  562.     Z_Boolean                    inDataIsVolatile)
  563. {
  564.     const char * startOfName;
  565.     const char * endOfName;
  566.     const char * endTag;
  567.     const char * expectedEndTag;
  568.  
  569.     // Until we see otherwise, assume the
  570.     // string is valid.
  571.     outParseInfo.fValidNamedString = true;
  572.  
  573.     // Set the volatile flag
  574.     outParseInfo.fIsVolatile = inDataIsVolatile;
  575.  
  576.     // Set the limit flag (assumed to not be limited)
  577.     outParseInfo.fHasMaxDataLen = false;
  578.  
  579.     // Make sure it starts with a valid tag.
  580.     if (inNamedString[0] != '<')
  581.     {
  582.         debug_str("Badly-formed named string");
  583.         goto BadEndTag;
  584.     }
  585.  
  586.     startOfName = SkipOverSpaces(inNamedString+1);
  587.  
  588.     // Decide which ending should be expected
  589.     expectedEndTag = "</Z>";
  590.     if (strncmp(startOfName, "Z name=", 7) != 0)
  591.     {
  592.         expectedEndTag = "</O>";
  593.         if (strncmp(startOfName, "O name=", 7) != 0)
  594.         {
  595.             debug_str("Badly-formed named string");
  596.             goto BadEndTag;
  597.         }
  598.     }
  599.     
  600.     outParseInfo.fNamedStringStart = inNamedString;
  601.     outParseInfo.fNameStr = &inNamedString[8];
  602.     
  603.     // Scan for a space or a right brace. This signals the end
  604.     // of the tag or additional parameters.
  605.     endOfName = strpbrk(outParseInfo.fNameStr, " >");
  606.     if (endOfName == NULL)
  607.     {
  608.         debug_str("Badly-formed named string");
  609.         goto BadStringName;
  610.     }
  611.     
  612.     // This part parses additional parameters in the tag
  613.     while (*endOfName == ' ')
  614.     {
  615.         if(!ParseAdditionalParameters(endOfName, outParseInfo))
  616.             goto BadStringName;
  617.  
  618.         endOfName = strpbrk(endOfName + 1, " >");   // Find the next space or >
  619.         if (endOfName == NULL)                        // It is an error if it can't find either
  620.         {
  621.             debug_str("Badly-formed named string");
  622.             goto BadStringName;
  623.         }
  624.     }
  625.  
  626.     if (*endOfName != '>')
  627.     {
  628.         debug_str("Badly-formed named string");
  629.         goto BadStringName;
  630.     }
  631.     
  632.     outParseInfo.fNameStrLen = endOfName - outParseInfo.fNameStr;
  633.     outParseInfo.fValueStr = endOfName + 1;
  634.  
  635.     // Make sure the string is closed off with an end tag.
  636.     endTag = strstr(outParseInfo.fValueStr, expectedEndTag);
  637.     if (endTag == NULL)
  638.     {
  639.         debug_str("Badly-formed named string");
  640.         goto BadEndTag;
  641.     }
  642.     
  643.     outParseInfo.fValueStrLen = endTag - outParseInfo.fValueStr;
  644.     outParseInfo.fNamedStringLimit = endTag + 4;
  645.  
  646.     check(outParseInfo.fNamedStringLimit > outParseInfo.fNamedStringStart);
  647.  
  648.     return true;
  649.  
  650. BadEndTag:
  651. BadStringName:
  652.     outParseInfo.fNamedStringLimit = outParseInfo.fNamedStringStart + 256; // doesn't have an end, so assign one
  653.     outParseInfo.fValidNamedString = false;
  654.     return false;
  655. }
  656.  
  657.  
  658. /*------------------------------------------------------------------
  659.     CreateNewZString
  660. ------------------------------------------------------------------*/
  661.  
  662. void
  663. ZStringParser::CreateNewZString(
  664.     const ZStringParseInfo &    inParseInfo,
  665.     ZString &                    outDestString)
  666. {
  667.     const char *    curInputPtr;
  668.     const char *    limitInputPtr;
  669.     Z_UInt16        outputCount = 0;
  670.  
  671.     // First, scan the string to validate it and count
  672.     // the number of characters we'll need.
  673.     curInputPtr = inParseInfo.fValueStr;
  674.     limitInputPtr = curInputPtr + inParseInfo.fValueStrLen;
  675.  
  676.     while (curInputPtr < limitInputPtr)
  677.     {
  678.         if (*curInputPtr == '&' || *curInputPtr == '<')
  679.         {
  680.             Z_UInt16 tagNameLength;
  681.             
  682.             ZStringTagID tagID = LookUpTagID(curInputPtr, limitInputPtr, tagNameLength);
  683.             check(tagID != kZTag_Invalid);
  684.             
  685.             if (tagID == kZTag_Invalid)
  686.             {
  687.                 curInputPtr++;
  688.                 outputCount++;
  689.             }
  690.             else if (tagID == kZTag_replace)
  691.             {
  692.                 // We special-case the "replace" tag because
  693.                 // it doesn't get replaced, and it has two additional 
  694.                 // characters.
  695.                 curInputPtr += 8;
  696.                 check(curInputPtr[0] >= '0' && curInputPtr[0] <= '9');
  697.                 check(curInputPtr[1] >= '0' && curInputPtr[1] <= '9');
  698.                 curInputPtr += 2;
  699.                 check(curInputPtr <= limitInputPtr);
  700.                 outputCount += 10;
  701.             }
  702.             else
  703.             {
  704.                 curInputPtr += tagNameLength;
  705.                 outputCount += GetTagReplacement(tagID, NULL);
  706.             }
  707.         }
  708.         else
  709.         {
  710.             outputCount += mTwoByteTable[(Z_UInt8)*curInputPtr] + 1;
  711.             curInputPtr += mTwoByteTable[(Z_UInt8)*curInputPtr] + 1;
  712.         }
  713.     }
  714.  
  715.     // Allocate the space for the string.
  716.     outDestString.AllocateData(outputCount);
  717.  
  718.     if (outDestString.GetData() != NULL)
  719.     {
  720.         // Next, do a second scan to replace the characters.
  721.         curInputPtr = inParseInfo.fValueStr;
  722.         limitInputPtr = curInputPtr + inParseInfo.fValueStrLen;
  723.  
  724.         char * curOutputPtr = outDestString.GetData()->GetDataArray();
  725.  
  726.         while (curInputPtr < limitInputPtr)
  727.         {
  728.             if (*curInputPtr == '&' || *curInputPtr == '<')
  729.             {
  730.                 Z_UInt16 tagNameLength;
  731.                 
  732.                 ZStringTagID tagID = LookUpTagID(curInputPtr, limitInputPtr, tagNameLength);
  733.                 
  734.                 if (tagID == kZTag_Invalid)
  735.                 {
  736.                     *curOutputPtr++ = *curInputPtr++;
  737.                 }
  738.                 else if (tagID == kZTag_replace)
  739.                 {
  740.                     // We special-case the "replace" tag because
  741.                     // it doesn't get replaced, and it has two additional 
  742.                     // characters.
  743.                     memcpy(curOutputPtr, curInputPtr, 10);
  744.                     curOutputPtr += 10;
  745.                     curInputPtr += 10;
  746.                 }
  747.                 else
  748.                 {
  749.                     curOutputPtr += GetTagReplacement(tagID, curOutputPtr);
  750.                     curInputPtr += tagNameLength;
  751.                 }
  752.             }
  753.             else
  754.             {
  755.                 Z_UInt8        charCount = mTwoByteTable[(Z_UInt8)*curInputPtr] + 1;
  756.                 
  757.                 while (charCount > 0)
  758.                 {
  759.                     *curOutputPtr++ = *curInputPtr++;
  760.                     charCount--;
  761.                 }
  762.             }
  763.         }
  764.     }
  765.     
  766.     // Register the string with the dictionary so
  767.     // we don't have to continue parsing it each time.
  768.     ZStringDictionary::GetZStringDictionary().RegisterString(inParseInfo, outDestString);
  769. }
  770.  
  771.  
  772. /*------------------------------------------------------------------
  773.     ConvertNamedStringToTag                        [static]
  774.     
  775.     This method converts the parsed named string to a named string
  776.     that contains only tags.
  777. ------------------------------------------------------------------*/
  778.  
  779. Z_Boolean
  780. ZStringParser::ConvertNamedStringToTag(
  781.     const ZStringParseInfo &    inParseInfo,
  782.     const ZToolOptions &        inOptions,
  783.     ZString &                    outDestString,
  784.     ZParserWarningType &        outWarningType)
  785. {
  786.     check(inParseInfo.fValidNamedString);
  787.  
  788.     // Initialize the warning type to none
  789.     outWarningType = kZParser_NoWarnings;
  790.     
  791.     // Copy the portion of the named string before the actual string prototype
  792.     outDestString.SetString(inParseInfo.fNamedStringStart, inParseInfo.fValueStr - inParseInfo.fNamedStringStart);
  793.     
  794.     // Now, copy the prototype one character at a time until we hit a tag
  795.     const char *        curInputPtr;
  796.     char                tagString[32];
  797.     
  798.     curInputPtr = inParseInfo.fValueStr;
  799.     
  800.     while (curInputPtr < inParseInfo.fValueStr + inParseInfo.fValueStrLen)
  801.     {
  802.         if (*curInputPtr == '&' || *curInputPtr == '<')
  803.         {
  804.             ZStringTagID            tagID;
  805.             Z_UInt16                tagLength;
  806.             Z_Boolean                isNumeric;
  807.  
  808.             isNumeric = (curInputPtr[0] == '&' && curInputPtr[1] == '#');
  809.  
  810.             tagID = LookUpTagID(curInputPtr, inParseInfo.fValueStr + inParseInfo.fValueStrLen, tagLength);
  811.             
  812.             check(tagID != kZTag_Invalid);
  813.             if (tagID == kZTag_Invalid)
  814.             {
  815.                 return false;
  816.             }
  817.             else if (tagID == kZTag_replace)
  818.             {
  819.                 ZString        replaceString;
  820.                 
  821.                 replaceString.SetString(curInputPtr, tagLength);
  822.                 outDestString += replaceString;
  823.                 curInputPtr += tagLength;
  824.             }
  825.             else if (*curInputPtr == '<')
  826.             {
  827.                 memcpy(tagString, curInputPtr, tagLength);
  828.                 tagString[tagLength] = '\0';
  829.                 outDestString += tagString;
  830.                 curInputPtr += tagLength;
  831.             }
  832.             else if (inOptions.mOutputNumericTags)        // Output numeric tags
  833.             {
  834.                 sprintf(tagString, "&#%.3d;", ConvertTagIDToNumeric(tagID));
  835.                 outDestString += tagString;
  836.                 curInputPtr += tagLength;
  837.             }
  838.             else                                        // Output alpha tags
  839.             {
  840.                 sprintf(tagString, "%s", ConvertTagIDToString(tagID));
  841.                 outDestString += tagString;
  842.                 curInputPtr += tagLength;
  843.             }
  844.  
  845.             if (!isNumeric && inOptions.mAllowTagSemicolon && *curInputPtr == ';')  
  846.             {
  847.                 // for alphabetic tags, skips over ;'s at the end if option is enabled
  848.                 curInputPtr++;
  849.             }
  850.         }
  851.         else 
  852.         {
  853.             unsigned char charValue = (unsigned char)(*curInputPtr);
  854.             if (charValue > 127 && inOptions.mConvertHighASCIIChar)        // if we want to try to convert high ASCII characters
  855.             {
  856.                 ZStringTagID    zStringTag = sNumericParseTags[charValue];
  857.                 if (zStringTag != kZTag_Invalid)
  858.                 {
  859.                     // Translate into html code (based on numeric or alpha option)
  860.                     if (inOptions.mOutputNumericTags)
  861.                         sprintf(tagString, "&#%.3d;", ConvertTagIDToNumeric(zStringTag));
  862.                     else
  863.                         sprintf(tagString, "%s", ConvertTagIDToString(zStringTag));
  864.                         
  865.                     outDestString += tagString;                // Add the tag to the stream
  866.                     curInputPtr++;
  867.                     outWarningType |= kZParser_ChangedString;
  868.                 }
  869.                 else                                          // Append the character to the output string
  870.                 {
  871.                     outDestString += *curInputPtr++;
  872.                     outWarningType |= kZParser_HasHighASCII;
  873.                 }
  874.             }
  875.             else if (inOptions.mOutputWarnings)        // Only check if we are printing warnings
  876.             {
  877.                 if (FindPossibleTag(curInputPtr, inParseInfo)) 
  878.                     outWarningType |= kZParser_FoundPossibleTag;
  879.                 outDestString += *curInputPtr++;
  880.             }
  881.             else
  882.                 outDestString += *curInputPtr++;
  883.         }
  884.     }
  885.     
  886.     // Finally, copy the rest of the named string
  887.     ZString        stringEnd;
  888.     stringEnd.SetString(inParseInfo.fValueStr + inParseInfo.fValueStrLen, 
  889.             inParseInfo.fNamedStringLimit - (inParseInfo.fValueStr + inParseInfo.fValueStrLen));
  890.     outDestString += stringEnd;
  891.  
  892.     return true;
  893. }
  894.  
  895.  
  896. /*------------------------------------------------------------------
  897.     FindPossibleTag
  898.  
  899.     This method searches the parse tags to see if one of them matches
  900.     the current input. It returns true if it does and false otherwise.
  901. ------------------------------------------------------------------*/
  902.  
  903. Z_Boolean
  904. ZStringParser::FindPossibleTag(
  905.     const char *                inCurInputPtr,
  906.     const ZStringParseInfo &    inParseInfo)
  907. {
  908.     Z_UInt32        strLength;
  909.     Z_UInt32        curTagIndex;
  910.     Z_UInt32        totalEntries = sizeof(sParseTags) / sizeof(ZStringParseTag);
  911.  
  912.     if (!isalpha(*inCurInputPtr))
  913.         return false;
  914.     
  915.     // Scan through the list of numerics for possible match
  916.     for (curTagIndex = 0; curTagIndex < totalEntries; curTagIndex++)
  917.     {
  918.         // We are ignoring the tags that could be stand-alone words or parts of words
  919.         if (!sParseTags[curTagIndex].fCheckForPartialTag)
  920.             continue;
  921.  
  922.         strLength = strlen(sParseTags[curTagIndex].fTagName) - 1;    // we are ignoring the initial &
  923.         if ((inCurInputPtr + strLength) <= inParseInfo.fValueStr + inParseInfo.fValueStrLen)
  924.         {
  925.             if (strncmp(inCurInputPtr, (sParseTags[curTagIndex].fTagName+1), strLength)==0)
  926.                 if (sParseTags[curTagIndex].fTagID == kZTag_replace)
  927.                 {
  928.                     if (inCurInputPtr[7] >= '0' && inCurInputPtr[7] <= '9' &&
  929.                         inCurInputPtr[8] >= '0' && inCurInputPtr[8] <= '9')
  930.                         return true;
  931.                 }
  932.                 else
  933.                     return true;
  934.         }
  935.     }
  936.     return false;
  937. }
  938.  
  939.  
  940. /*------------------------------------------------------------------
  941.     CheckDataLength
  942. ------------------------------------------------------------------*/
  943.  
  944. Z_Boolean 
  945. ZStringParser::CheckDataLength(
  946.     const ZStringParseInfo &    inParseInfo)
  947. {
  948.     Z_UInt16        stringLen = inParseInfo.fValueStrLen;
  949.     const char *    curPtr;
  950.     char *            searchString = new char[stringLen+1];
  951.     char *            endOfStringPtr = searchString + stringLen;
  952.  
  953.     strncpy(searchString, inParseInfo.fValueStr, stringLen);
  954.     searchString[stringLen] = '\0';
  955.     curPtr = searchString;
  956.  
  957.     while (curPtr < endOfStringPtr)
  958.     {
  959.         curPtr = strpbrk(curPtr, "&");
  960.         if (curPtr == NULL)                // No tag found so break
  961.             break;
  962.  
  963.         // if it is numeric, it is a set length of 6 characters (ex: {)
  964.         if ((curPtr+1) < endOfStringPtr && curPtr[1] == '#')
  965.         {
  966.             stringLen -= 5;                // 6 characters represent 1 letter, so only subtract (6-1)=5
  967.         }
  968.         else                            // It is alpha tag
  969.         {
  970.             ZStringTagID            tagID;
  971.             Z_UInt16                tagLength;
  972.  
  973.             tagID = LookUpTagID(curPtr, inParseInfo.fValueStr + inParseInfo.fValueStrLen, tagLength);
  974.             if (tagID != kZTag_Invalid)    // No tag ID found, so ignore this tag.
  975.                 stringLen -= (tagLength - 1);
  976.         }
  977.         curPtr++;
  978.     }
  979.  
  980.     delete [] searchString;
  981.  
  982.     return (stringLen > inParseInfo.fMaxDataLen);
  983. }
  984.  
  985.  
  986. /*------------------------------------------------------------------
  987.     CompareTagStrings
  988. ------------------------------------------------------------------*/
  989.  
  990. Z_SInt32
  991. ZStringParser::CompareTagStrings(
  992.     const char *                inTagFromTable,
  993.     const char *                inParseString,
  994.     const char *                inParseStringLimit)
  995. {
  996.     while (*inTagFromTable != '\0' &&
  997.         inParseString < inParseStringLimit)
  998.     {
  999.         if (*inTagFromTable < *inParseString)
  1000.             return -1;
  1001.         else if (*inTagFromTable > *inParseString)
  1002.             return 1;
  1003.         
  1004.         inTagFromTable++;
  1005.         inParseString++;
  1006.     }
  1007.     
  1008.     return 0;
  1009. }
  1010.  
  1011.  
  1012. /*------------------------------------------------------------------
  1013.     ConvertTagIDToNumeric
  1014. ------------------------------------------------------------------*/
  1015.  
  1016. Z_UInt32
  1017. ZStringParser::ConvertTagIDToNumeric(
  1018.     ZStringTagID                inTag)
  1019. {
  1020.     // The replace tag has no equivalent
  1021.     check(inTag != kZTag_Invalid && inTag != kZTag_replace);
  1022.     
  1023.     Z_UInt32            curTagIndex;
  1024.     Z_UInt32            totalEntries = sizeof(sNumericParseTags) / sizeof(ZStringTagID);
  1025.     
  1026.     // Scan through the entire list of numerics
  1027.     // for this tag.
  1028.     for (curTagIndex = 0; curTagIndex < totalEntries; curTagIndex++)
  1029.     {
  1030.         if (sNumericParseTags[curTagIndex] == inTag)
  1031.             return curTagIndex;
  1032.     }
  1033.  
  1034.     debug_str("Didn't find tag");
  1035.  
  1036.     return 0;
  1037. }
  1038.  
  1039.  
  1040. /*------------------------------------------------------------------
  1041.     ConvertTagIDToString
  1042. ------------------------------------------------------------------*/
  1043.  
  1044. const char *
  1045. ZStringParser::ConvertTagIDToString(
  1046.     ZStringTagID                inTag)
  1047. {
  1048.     // The replace tag has no equivalent
  1049.     check(inTag != kZTag_Invalid && inTag != kZTag_replace);
  1050.     
  1051.     Z_UInt32            curTagIndex;
  1052.     Z_UInt32            totalEntries = sizeof(sParseTags) / sizeof(ZStringParseTag);
  1053.     
  1054.     // Scan through the entire list of numerics
  1055.     // for this tag.
  1056.     for (curTagIndex = 0; curTagIndex < totalEntries; curTagIndex++)
  1057.     {
  1058.         if (sParseTags[curTagIndex].fTagID == inTag)
  1059.             return sParseTags[curTagIndex].fTagName;
  1060.     }
  1061.  
  1062.     debug_str("Didn't find tag");
  1063.  
  1064.     return NULL;
  1065. }
  1066.  
  1067.  
  1068. /*------------------------------------------------------------------
  1069.     LookUpTagID                                            [static]
  1070.     
  1071.     Looks up a tag starting with an ampersand ("&") or a 
  1072.     left bracket ("<").
  1073. ------------------------------------------------------------------*/
  1074.  
  1075. ZStringTagID
  1076. ZStringParser::LookUpTagID(
  1077.     const char *                inStartOfTag,
  1078.     const char *                inStringLimit,
  1079.     Z_UInt16 &                    outTagNameLength)
  1080. {
  1081.     ZStringTagID    zStringTag = kZTag_Invalid;
  1082.     
  1083.     // Make sure I didn't forget to type an entry
  1084.     check(sizeof(sNumericParseTags) == 256 * sizeof(ZStringTagID));
  1085.     
  1086.     check(inStartOfTag[0] == '&' || inStartOfTag[0] == '<');
  1087.  
  1088.     // Is it a numeric tag?
  1089.     if (inStartOfTag[0] == '&' && inStartOfTag[1] == '#')
  1090.     {
  1091.         Z_UInt32        numericTagValue = 0;
  1092.         
  1093.         // There should be at least five characters in the tag
  1094.         check(inStringLimit >= inStartOfTag + 6);
  1095.         if (inStringLimit < inStartOfTag + 6)
  1096.             return kZTag_Invalid;
  1097.         
  1098.         check(inStartOfTag[2] >= '0' && inStartOfTag[2] <= '9');
  1099.         if (inStartOfTag[2] < '0' || inStartOfTag[2] > '9')
  1100.             return kZTag_Invalid;
  1101.         numericTagValue = (inStartOfTag[2] - '0') * 100;
  1102.         
  1103.         check(inStartOfTag[3] >= '0' && inStartOfTag[3] <= '9');
  1104.         if (inStartOfTag[3] < '0' || inStartOfTag[3] > '9')
  1105.             return kZTag_Invalid;
  1106.         numericTagValue += (inStartOfTag[3] - '0') * 10;
  1107.  
  1108.         check(inStartOfTag[4] >= '0' && inStartOfTag[4] <= '9');
  1109.         if (inStartOfTag[4] < '0' || inStartOfTag[4] > '9')
  1110.             return kZTag_Invalid;
  1111.         numericTagValue += (inStartOfTag[4] - '0');
  1112.         
  1113.         // It should end in a semicolon
  1114.         check(inStartOfTag[5] == ';');
  1115.         if (inStartOfTag[5] != ';')
  1116.             return kZTag_Invalid;
  1117.         
  1118.         if (numericTagValue >= 256)
  1119.             return kZTag_Invalid;
  1120.         
  1121.         zStringTag = sNumericParseTags[numericTagValue];
  1122.         check(zStringTag != kZTag_Invalid);
  1123.         
  1124.         if (zStringTag != kZTag_Invalid)
  1125.             outTagNameLength = 6;
  1126.     }
  1127.     else
  1128.     {
  1129.         Z_UInt32        minEntryIndex = 0;
  1130.         Z_UInt32        maxEntryIndex = sizeof(sParseTags) / sizeof(ZStringParseTag);
  1131.         Z_UInt32        curEntryIndex;
  1132.         Z_UInt32        maxNumberOfEntries = maxEntryIndex;
  1133.             
  1134.         while (true)
  1135.         {
  1136.             // Cut the search in half.
  1137.             curEntryIndex = (minEntryIndex + maxEntryIndex) / 2;
  1138.  
  1139.             if (curEntryIndex > maxNumberOfEntries)
  1140.                 return kZTag_Invalid;
  1141.  
  1142.             Z_SInt32 compareResult;
  1143.             compareResult = CompareTagStrings(
  1144.                     sParseTags[curEntryIndex].fTagName,
  1145.                     inStartOfTag, 
  1146.                     inStringLimit);
  1147.         
  1148.             if (compareResult == 0)
  1149.             {
  1150.                 Z_UInt32 tagNameLength = strlen(sParseTags[curEntryIndex].fTagName);
  1151.                 
  1152.                 // Make sure we didn't just swallow a portion
  1153.                 // of one of the tag names.
  1154.                 if (tagNameLength <= inStringLimit - inStartOfTag)
  1155.                 {
  1156.                     zStringTag = sParseTags[curEntryIndex].fTagID;
  1157.                     outTagNameLength = tagNameLength;
  1158.                 }
  1159.                 break;
  1160.             }
  1161.             else if (compareResult > 0)
  1162.             {
  1163.                 // The tag was less than the value in the table,
  1164.                 // so we need to search further up the table.
  1165.                 maxEntryIndex = curEntryIndex - 1;
  1166.             }
  1167.             else // if (compareResult < 0)
  1168.             {
  1169.                 // The tag was greater than the value in the table,
  1170.                 // so we need to search further down the table.
  1171.                 minEntryIndex = curEntryIndex + 1;
  1172.             }
  1173.  
  1174.             // If we converged, but didn't find a match,
  1175.             // break out of the loop.
  1176.             if (maxEntryIndex < minEntryIndex)
  1177.                 break;
  1178.         }
  1179.     }
  1180.     
  1181.     return zStringTag;
  1182. }
  1183.  
  1184.     
  1185.  
  1186.